Regression Models Course Project

row.names(mtcars)
##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Valiant"            
##  [7] "Duster 360"          "Merc 240D"           "Merc 230"           
## [10] "Merc 280"            "Merc 280C"           "Merc 450SE"         
## [13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
## [16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"           
## [19] "Honda Civic"         "Toyota Corolla"      "Toyota Corona"      
## [22] "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"         
## [25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"      
## [28] "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
## [31] "Maserati Bora"       "Volvo 142E"

Some functions to compute predicted R squared

PRESS <- function(linear.model) {
  #' calculate the predictive residuals
  pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
  #' calculate the PRESS
  PRESS <- sum(pr^2)
  
  return(PRESS)
}
pred_r_squared <- function(linear.model) {
  #' Use anova() to get the sum of squares for the linear model
  lm.anova <- anova(linear.model)
  #' Calculate the total sum of squares
  tss <- sum(lm.anova$'Sum Sq')
  # Calculate the predictive R^2
  pred.r.squared <- 1-PRESS(linear.model)/(tss)
  
  return(pred.r.squared)
}
model_fit_stats <- function(linear.model) {
  r.sqr <- summary(linear.model)$r.squared
  adj.r.sqr <- summary(linear.model)$adj.r.squared
  pre.r.sqr <- pred_r_squared(linear.model)
  PRESS <- PRESS(linear.model)
  return.df <- data.frame(r.squared = r.sqr, adj.r.squared = adj.r.sqr, pred.r.squared = pre.r.sqr, press = PRESS)
  return(return.df)
}
lmp <- function (modelobject) {
    if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
    f <- summary(modelobject)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    attributes(p) <- NULL
    return(p)
}

Data and library load, integrity check, and summary

library(datasets)
library(ggplot2)
library(plyr)
data(mtcars)
sum(is.na(mtcars))
## [1] 0
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

Explore the data

par(mfrow=c(2,2))
qplot(mpg, geom="histogram",data=mtcars,binwidth=0.5)

qplot(wt, geom="histogram",data=mtcars,binwidth=0.5)

qplot(cyl, geom="histogram",data=mtcars,binwidth=0.5)

qplot(disp, geom="histogram",data=mtcars,binwidth=0.5)

par(mfrow=c(2,2))
qplot(hp, geom="histogram",data=mtcars,binwidth=0.5)

qplot(carb, geom="histogram",data=mtcars,binwidth=0.5)

qplot(qsec, geom="histogram",data=mtcars,binwidth=0.5)

qplot(gear, geom="histogram",data=mtcars,binwidth=0.5)

par(mfrow=c(2,2))
qplot(am, geom="histogram",data=mtcars,binwidth=0.5)

qplot(vs, geom="histogram",data=mtcars,binwidth=0.5)

qplot(drat, geom="histogram",data=mtcars,binwidth=0.5)

wt vs mpg

qplot(wt,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

disp vs mpg

qplot(disp,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

hp vs mpg

qplot(hp,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

qsec vs mpg

qplot(qsec,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

drat vs mpg

qplot(drat,mpg,data=mtcars,geom=c("point","smooth"),method="loess")

tables

table(mtcars$am)
## 
##  0  1 
## 19 13
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
table(mtcars$vs)
## 
##  0  1 
## 18 14
table(mtcars$gear)
## 
##  3  4  5 
## 15 12  5
table(mtcars$carb)
## 
##  1  2  3  4  6  8 
##  7 10  3 10  1  1

Look at mpg for (0) automatic and (1) manual transmissions

boxplot(mpg ~ factor(am), data = mtcars, xlab="transmission", ylab="mpg")

mean(subset(mtcars, am == 0)$mpg)
## [1] 17.14737
mean(subset(mtcars, am == 1)$mpg)
## [1] 24.39231

lm01 - Fit a model with mpg as the response with just the factor variable transmission

lm01 <- lm(mpg ~ factor(am), data = mtcars)
summary(lm01)
## 
## Call:
## lm(formula = mpg ~ factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3923 -3.0923 -0.2974  3.2439  9.5077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   17.147      1.125  15.247 1.13e-15 ***
## factor(am)1    7.245      1.764   4.106 0.000285 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.902 on 30 degrees of freedom
## Multiple R-squared:  0.3598, Adjusted R-squared:  0.3385 
## F-statistic: 16.86 on 1 and 30 DF,  p-value: 0.000285
pred_r_squared(lm01)
## [1] 0.2626133
par(mfrow=c(2,2))
plot(lm01)

Correlations

sort(cor(mtcars)[1,]) # wt,cyl,disp,hp,carb,qsec,gear,am,vs,drat,mpg
##         wt        cyl       disp         hp       carb       qsec 
## -0.8676594 -0.8521620 -0.8475514 -0.7761684 -0.5509251  0.4186840 
##       gear         am         vs       drat        mpg 
##  0.4802848  0.5998324  0.6640389  0.6811719  1.0000000
cor(mtcars$cyl,mtcars$disp)
## [1] 0.9020329
cor(mtcars$wt,mtcars$disp)
## [1] 0.8879799
cor(mtcars$cyl,mtcars$hp)
## [1] 0.8324475
cor(mtcars$disp,mtcars$hp)
## [1] 0.7909486
cor(mtcars$wt,mtcars$cyl)
## [1] 0.7824958
cor(mtcars$wt,mtcars$hp)
## [1] 0.6587479

lm02 - Fit model with weight and transmission predicting mpg

lm02 <- lm(mpg ~ wt + factor(am), data = mtcars)
summary(lm02)
## 
## Call:
## lm(formula = mpg ~ wt + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5295 -2.3619 -0.1317  1.4025  6.8782 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.32155    3.05464  12.218 5.84e-13 ***
## wt          -5.35281    0.78824  -6.791 1.87e-07 ***
## factor(am)1 -0.02362    1.54565  -0.015    0.988    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.098 on 29 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7358 
## F-statistic: 44.17 on 2 and 29 DF,  p-value: 1.579e-09
pred_r_squared(lm02)
## [1] 0.6874984
library(car)
vif(lm02)
##         wt factor(am) 
##   1.921413   1.921413
par(mfrow=c(2,2))
plot(lm02)

q02 <- qplot(wt, mpg, data=mtcars, colour=am)
q02 <- q02 + geom_abline(intercept=summary(lm02)$coef[1,1], slope=summary(lm02)$coef[2,1], col="red")
q02

lm03 - Fit model with cylinders and transmission predicting mpg

lm03 <- lm(mpg ~ cyl + factor(am), data = mtcars)
summary(lm03)
## 
## Call:
## lm(formula = mpg ~ cyl + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6856 -1.7172 -0.2657  1.8838  6.8144 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  34.5224     2.6032  13.262 7.69e-14 ***
## cyl          -2.5010     0.3608  -6.931 1.28e-07 ***
## factor(am)1   2.5670     1.2914   1.988   0.0564 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.059 on 29 degrees of freedom
## Multiple R-squared:  0.759,  Adjusted R-squared:  0.7424 
## F-statistic: 45.67 on 2 and 29 DF,  p-value: 1.094e-09
pred_r_squared(lm03)
## [1] 0.7082706
library(car)
vif(lm03)
##        cyl factor(am) 
##   1.375739   1.375739
par(mfrow=c(2,2))
plot(lm03)

q03 <- qplot(cyl, mpg, data=mtcars, colour=am)
q03 <- q03 + geom_abline(intercept=summary(lm03)$coef[1,1], slope=summary(lm03)$coef[2,1], col="red")
q03

lm04 - Fit model with disp and transmission predicting mpg

lm04 <- lm(mpg ~ disp + factor(am), data = mtcars)
summary(lm04)
## 
## Call:
## lm(formula = mpg ~ disp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6382 -2.4751 -0.5631  2.2333  6.8386 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.848081   1.834071  15.184 2.45e-15 ***
## disp        -0.036851   0.005782  -6.373 5.75e-07 ***
## factor(am)1  1.833458   1.436100   1.277    0.212    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.218 on 29 degrees of freedom
## Multiple R-squared:  0.7333, Adjusted R-squared:  0.7149 
## F-statistic: 39.87 on 2 and 29 DF,  p-value: 4.749e-09
pred_r_squared(lm04)
## [1] 0.6770371
library(car)
vif(lm04)
##       disp factor(am) 
##   1.537396   1.537396
par(mfrow=c(2,2))
plot(lm04)

q04 <- qplot(disp, mpg, data=mtcars, colour=am)
q04 <- q04 + geom_abline(intercept=summary(lm04)$coef[1,1], slope=summary(lm04)$coef[2,1], col="red")
q04

lm05 - Fit model with wt, cyl, disp, hp and transmission predicting mpg

lm05 <- lm(mpg ~ wt + cyl + disp + hp + factor(am), data = mtcars)
summary(lm05)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + disp + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5952 -1.5864 -0.7157  1.2821  5.5725 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.20280    3.66910  10.412 9.08e-11 ***
## wt          -3.30262    1.13364  -2.913  0.00726 ** 
## cyl         -1.10638    0.67636  -1.636  0.11393    
## disp         0.01226    0.01171   1.047  0.30472    
## hp          -0.02796    0.01392  -2.008  0.05510 .  
## factor(am)1  1.55649    1.44054   1.080  0.28984    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.505 on 26 degrees of freedom
## Multiple R-squared:  0.8551, Adjusted R-squared:  0.8273 
## F-statistic:  30.7 on 5 and 26 DF,  p-value: 4.029e-10
pred_r_squared(lm05)
## [1] 0.7849818
library(car)
vif(lm05)
##         wt        cyl       disp         hp factor(am) 
##   6.079452   7.209456  10.401420   4.501859   2.553064
par(mfrow=c(2,2))
plot(lm05)

lm06 - Fit model with wt, hp and transmission predicting mpg

lm06 <- lm(mpg ~ wt + hp + factor(am), data = mtcars)
summary(lm06)
## 
## Call:
## lm(formula = mpg ~ wt + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## factor(am)1  2.083710   1.376420   1.514 0.141268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
pred_r_squared(lm06)
## [1] 0.7878597
library(car)
vif(lm06)
##         wt         hp factor(am) 
##   3.774838   2.088124   2.271082
par(mfrow=c(2,2))
plot(lm06)

lm07 - Fit model with wt, cyl and transmission predicting mpg

lm07 <- lm(mpg ~ wt + cyl + factor(am), data = mtcars)
summary(lm07)
## 
## Call:
## lm(formula = mpg ~ wt + cyl + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1735 -1.5340 -0.5386  1.5864  6.0812 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.4179     2.6415  14.923 7.42e-15 ***
## wt           -3.1251     0.9109  -3.431  0.00189 ** 
## cyl          -1.5102     0.4223  -3.576  0.00129 ** 
## factor(am)1   0.1765     1.3045   0.135  0.89334    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.612 on 28 degrees of freedom
## Multiple R-squared:  0.8303, Adjusted R-squared:  0.8122 
## F-statistic: 45.68 on 3 and 28 DF,  p-value: 6.51e-11
pred_r_squared(lm07)
## [1] 0.7756775
library(car)
vif(lm07)
##         wt        cyl factor(am) 
##   3.609011   2.584066   1.924955
par(mfrow=c(2,2))
plot(lm07)

lm08 - Fit model with wt interacting with transmission predicting mpg

lm08 <- lm(mpg ~ wt * factor(am), data = mtcars)
summary(lm08)
## 
## Call:
## lm(formula = mpg ~ wt * factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6004 -1.5446 -0.5325  0.9012  6.0909 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     31.4161     3.0201  10.402 4.00e-11 ***
## wt              -3.7859     0.7856  -4.819 4.55e-05 ***
## factor(am)1     14.8784     4.2640   3.489  0.00162 ** 
## wt:factor(am)1  -5.2984     1.4447  -3.667  0.00102 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8151 
## F-statistic: 46.57 on 3 and 28 DF,  p-value: 5.209e-11
pred_r_squared(lm08)
## [1] 0.7857928
library(car)
vif(lm08)
##            wt    factor(am) wt:factor(am) 
##      2.728248     20.901259     15.366853
par(mfrow=c(2,2))
plot(lm08)

lm09 - Fit model with wt interacting with transmission + qsec predicting mpg

lm09 <- lm(mpg ~ wt * am + qsec, data=mtcars)
summary(lm09)
## 
## Call:
## lm(formula = mpg ~ wt * am + qsec, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5076 -1.3801 -0.5588  1.0630  4.3684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.723      5.899   1.648 0.110893    
## wt            -2.937      0.666  -4.409 0.000149 ***
## am            14.079      3.435   4.099 0.000341 ***
## qsec           1.017      0.252   4.035 0.000403 ***
## wt:am         -4.141      1.197  -3.460 0.001809 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.084 on 27 degrees of freedom
## Multiple R-squared:  0.8959, Adjusted R-squared:  0.8804 
## F-statistic: 58.06 on 4 and 27 DF,  p-value: 7.168e-13
pred_r_squared(lm09)
## [1] 0.8535095
library(car)
vif(lm09)
##        wt        am      qsec     wt:am 
##  3.030963 20.970925  1.447406 16.302453
par(mfrow=c(2,2))
plot(lm09)

lm10 - Fit weight, transmission, wt interacting with transmission

lm10 <- lm(mpg ~ wt + factor(am) + wt * factor(am), data=mtcars)
summary(lm10)
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + wt * factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6004 -1.5446 -0.5325  0.9012  6.0909 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     31.4161     3.0201  10.402 4.00e-11 ***
## wt              -3.7859     0.7856  -4.819 4.55e-05 ***
## factor(am)1     14.8784     4.2640   3.489  0.00162 ** 
## wt:factor(am)1  -5.2984     1.4447  -3.667  0.00102 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared:  0.833,  Adjusted R-squared:  0.8151 
## F-statistic: 46.57 on 3 and 28 DF,  p-value: 5.209e-11
pred_r_squared(lm10)
## [1] 0.7857928
library(car)
vif(lm10)
##            wt    factor(am) wt:factor(am) 
##      2.728248     20.901259     15.366853
par(mfrow=c(2,2))
plot(lm10)

lm11 - Fit weight, transmission, cyl

lm11 <- lm(mpg ~ wt + factor(am) + factor(cyl), data=mtcars)
summary(lm11)
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(cyl), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4898 -1.3116 -0.5039  1.4162  5.7758 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.7536     2.8135  11.997  2.5e-12 ***
## wt            -3.1496     0.9080  -3.469  0.00177 ** 
## factor(am)1    0.1501     1.3002   0.115  0.90895    
## factor(cyl)6  -4.2573     1.4112  -3.017  0.00551 ** 
## factor(cyl)8  -6.0791     1.6837  -3.611  0.00123 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared:  0.8375, Adjusted R-squared:  0.8134 
## F-statistic: 34.79 on 4 and 27 DF,  p-value: 2.73e-10
pred_r_squared(lm11)
## [1] 0.7765213
library(car)
vif(lm11)
##                 GVIF Df GVIF^(1/(2*Df))
## wt          3.611208  1        1.900318
## factor(am)  1.925620  1        1.387667
## factor(cyl) 2.585745  2        1.268079
par(mfrow=c(2,2))
plot(lm11)

lm12 - Fit weight and hp

lm12 <- lm(mpg ~ wt + hp, data=mtcars)
summary(lm12)
## 
## Call:
## lm(formula = mpg ~ wt + hp, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.941 -1.600 -0.182  1.050  5.854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.22727    1.59879  23.285  < 2e-16 ***
## wt          -3.87783    0.63273  -6.129 1.12e-06 ***
## hp          -0.03177    0.00903  -3.519  0.00145 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.593 on 29 degrees of freedom
## Multiple R-squared:  0.8268, Adjusted R-squared:  0.8148 
## F-statistic: 69.21 on 2 and 29 DF,  p-value: 9.109e-12
pred_r_squared(lm12)
## [1] 0.7810871
library(car)
vif(lm12)
##       wt       hp 
## 1.766625 1.766625
par(mfrow=c(2,2))
plot(lm12)

lm13 - Fit weight and carb

lm13 <- lm(mpg ~ wt + carb, data=mtcars)
summary(lm13)
## 
## Call:
## lm(formula = mpg ~ wt + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5206 -2.1223 -0.0467  1.4551  5.9736 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.7300     1.7602  21.435  < 2e-16 ***
## wt           -4.7646     0.5765  -8.265 4.12e-09 ***
## carb         -0.8215     0.3492  -2.353   0.0256 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.839 on 29 degrees of freedom
## Multiple R-squared:  0.7924, Adjusted R-squared:  0.7781 
## F-statistic: 55.36 on 2 and 29 DF,  p-value: 1.255e-10
pred_r_squared(lm13)
## [1] 0.7461679
library(car)
vif(lm13)
##       wt     carb 
## 1.223761 1.223761
par(mfrow=c(2,2))
plot(lm13)

lm14 - Fit weight and disp

lm14 <- lm(mpg ~ wt + disp, data=mtcars)
summary(lm14)
## 
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4087 -2.3243 -0.7683  1.7721  6.3484 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.96055    2.16454  16.151 4.91e-16 ***
## wt          -3.35082    1.16413  -2.878  0.00743 ** 
## disp        -0.01773    0.00919  -1.929  0.06362 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared:  0.7809, Adjusted R-squared:  0.7658 
## F-statistic: 51.69 on 2 and 29 DF,  p-value: 2.744e-10
pred_r_squared(lm14)
## [1] 0.725321
library(car)
vif(lm14)
##       wt     disp 
## 4.728319 4.728319
par(mfrow=c(2,2))
plot(lm14)

lm15 - Fit weight and factor(vs)

lm15 <- lm(mpg ~ wt + factor(vs), data=mtcars)
summary(lm15)
## 
## Call:
## lm(formula = mpg ~ wt + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7071 -2.4415 -0.3129  1.4319  6.0156 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  33.0042     2.3554  14.012 1.92e-14 ***
## wt           -4.4428     0.6134  -7.243 5.63e-08 ***
## factor(vs)1   3.1544     1.1907   2.649   0.0129 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.78 on 29 degrees of freedom
## Multiple R-squared:  0.801,  Adjusted R-squared:  0.7873 
## F-statistic: 58.36 on 2 and 29 DF,  p-value: 6.818e-11
pred_r_squared(lm15)
## [1] 0.7530933
library(car)
vif(lm15)
##         wt factor(vs) 
##   1.444943   1.444943
par(mfrow=c(2,2))
plot(lm15)

lm16 - Fit hp and factor(vs)

lm16 <- lm(mpg ~ hp + factor(vs), data=mtcars)
summary(lm16)
## 
## Call:
## lm(formula = mpg ~ hp + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7131 -2.3336 -0.1332  1.9055  7.9055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 26.96300    2.89069   9.328 3.13e-10 ***
## hp          -0.05453    0.01448  -3.766 0.000752 ***
## factor(vs)1  2.57622    1.96966   1.308 0.201163    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.818 on 29 degrees of freedom
## Multiple R-squared:  0.6246, Adjusted R-squared:  0.5987 
## F-statistic: 24.12 on 2 and 29 DF,  p-value: 6.768e-07
pred_r_squared(lm16)
## [1] 0.5082001
library(car)
vif(lm16)
##         hp factor(vs) 
##    2.09586    2.09586
par(mfrow=c(2,2))
plot(lm16)

lm17 - Fit disp and factor(vs)

lm17 <- lm(mpg ~ disp + factor(vs), data=mtcars)
summary(lm17)
## 
## Call:
## lm(formula = mpg ~ disp + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4605 -2.0260 -0.6467  1.7285  7.0790 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.949282   2.201166  12.697 2.27e-13 ***
## disp        -0.036896   0.006715  -5.494 6.43e-06 ***
## factor(vs)1  1.495004   1.651290   0.905    0.373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.261 on 29 degrees of freedom
## Multiple R-squared:  0.7261, Adjusted R-squared:  0.7072 
## F-statistic: 38.44 on 2 and 29 DF,  p-value: 7.005e-09
pred_r_squared(lm17)
## [1] 0.66982
library(car)
vif(lm17)
##       disp factor(vs) 
##   2.018941   2.018941
par(mfrow=c(2,2))
plot(lm17)

Results of stepwise selection using MASS stepAIC

library(MASS)
fit<- lm(mpg~cyl+disp+hp+drat+wt+qsec+gear+carb,data=mtcars)
step <- stepAIC(fit, direction="both")
## Start:  AIC=69.12
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear + carb
## 
##        Df Sum of Sq    RSS    AIC
## - carb  1    0.6725 158.80 67.260
## - cyl   1    1.2886 159.41 67.384
## - drat  1    3.2639 161.39 67.778
## - disp  1    3.7211 161.84 67.869
## - qsec  1    5.2229 163.35 68.164
## - gear  1    5.3164 163.44 68.183
## - hp    1    6.9592 165.08 68.503
## <none>              158.12 69.125
## - wt    1   30.4892 188.61 72.767
## 
## Step:  AIC=67.26
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - cyl   1     2.055 160.85 65.672
## - drat  1     2.810 161.61 65.822
## - gear  1     4.681 163.48 66.190
## - qsec  1     7.390 166.19 66.716
## <none>              158.80 67.260
## - disp  1    11.211 170.01 67.443
## - hp    1    12.981 171.78 67.775
## + carb  1     0.673 158.12 69.125
## - wt    1    76.291 235.09 77.815
## 
## Step:  AIC=65.67
## mpg ~ disp + hp + drat + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - drat  1     4.936 165.79 64.639
## - gear  1     9.278 170.13 65.466
## - disp  1     9.846 170.70 65.573
## <none>              160.85 65.672
## - qsec  1    17.833 178.69 67.036
## - hp    1    18.419 179.27 67.141
## + cyl   1     2.055 158.80 67.260
## + carb  1     1.439 159.41 67.384
## - wt    1    84.464 245.32 77.178
## 
## Step:  AIC=64.64
## mpg ~ disp + hp + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - disp  1     8.692 174.48 64.274
## <none>              165.79 64.639
## + drat  1     4.936 160.85 65.672
## + cyl   1     4.182 161.61 65.822
## - qsec  1    17.694 183.48 65.884
## - gear  1    19.848 185.63 66.258
## - hp    1    20.149 185.94 66.310
## + carb  1     0.890 164.90 66.467
## - wt    1    91.501 257.29 76.703
## 
## Step:  AIC=64.27
## mpg ~ hp + wt + qsec + gear
## 
##        Df Sum of Sq    RSS    AIC
## - qsec  1    11.057 185.54 64.241
## <none>              174.48 64.274
## - gear  1    11.579 186.06 64.331
## + disp  1     8.692 165.79 64.639
## - hp    1    13.505 187.99 64.660
## + carb  1     6.845 167.63 64.994
## + drat  1     3.783 170.70 65.573
## + cyl   1     1.973 172.51 65.911
## - wt    1   105.091 279.57 77.361
## 
## Step:  AIC=64.24
## mpg ~ hp + wt + gear
## 
##        Df Sum of Sq    RSS    AIC
## - gear  1     9.510 195.05 63.840
## <none>              185.54 64.241
## + qsec  1    11.057 174.48 64.274
## + carb  1    10.850 174.69 64.312
## + cyl   1     9.773 175.76 64.509
## + drat  1     4.176 181.36 65.512
## + disp  1     2.056 183.48 65.884
## - hp    1    91.647 277.19 75.086
## - wt    1    94.554 280.09 75.420
## 
## Step:  AIC=63.84
## mpg ~ hp + wt
## 
##        Df Sum of Sq    RSS    AIC
## + cyl   1    18.427 176.62 62.665
## <none>              195.05 63.840
## + drat  1    11.366 183.68 63.919
## + gear  1     9.510 185.54 64.241
## + qsec  1     8.988 186.06 64.331
## + carb  1     0.300 194.75 65.791
## + disp  1     0.057 194.99 65.831
## - hp    1    83.274 278.32 73.217
## - wt    1   252.627 447.67 88.427
## 
## Step:  AIC=62.66
## mpg ~ hp + wt + cyl
## 
##        Df Sum of Sq    RSS    AIC
## <none>              176.62 62.665
## - hp    1    14.551 191.17 63.198
## + disp  1     6.176 170.44 63.526
## - cyl   1    18.427 195.05 63.840
## + carb  1     2.519 174.10 64.205
## + drat  1     2.245 174.38 64.255
## + qsec  1     1.401 175.22 64.410
## + gear  1     0.856 175.76 64.509
## - wt    1   115.354 291.98 76.750
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear + carb
## 
## Final Model:
## mpg ~ hp + wt + cyl
## 
## 
##     Step Df   Deviance Resid. Df Resid. Dev      AIC
## 1                             23   158.1237 69.12453
## 2 - carb  1  0.6725191        24   158.7962 67.26034
## 3  - cyl  1  2.0553287        25   160.8515 65.67186
## 4 - drat  1  4.9362500        26   165.7878 64.63912
## 5 - disp  1  8.6924752        27   174.4802 64.27442
## 6 - qsec  1 11.0574885        28   185.5377 64.24071
## 7 - gear  1  9.5100407        29   195.0478 63.84027
## 8  + cyl  1 18.4272345        28   176.6205 62.66456

lm18 - Fit hp, wt, and cyl

lm18 <- lm(mpg ~ hp + wt + cyl, data=mtcars)
summary(lm18)
## 
## Call:
## lm(formula = mpg ~ hp + wt + cyl, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9290 -1.5598 -0.5311  1.1850  5.8986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.75179    1.78686  21.687  < 2e-16 ***
## hp          -0.01804    0.01188  -1.519 0.140015    
## wt          -3.16697    0.74058  -4.276 0.000199 ***
## cyl         -0.94162    0.55092  -1.709 0.098480 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared:  0.8431, Adjusted R-squared:  0.8263 
## F-statistic: 50.17 on 3 and 28 DF,  p-value: 2.184e-11
pred_r_squared(lm18)
## [1] 0.7956775
library(car)
vif(lm18)
##       hp       wt      cyl 
## 3.258481 2.580486 4.757456
par(mfrow=c(2,2))
plot(lm18)

Some multiple factor models and cyl,gear,carb as continuous variables

summary(lm(mpg~wt+factor(am)+factor(vs),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(vs), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7733 -2.2519 -0.3445  1.4129  5.6594 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  30.0787     3.7480   8.025 9.71e-09 ***
## wt           -3.7845     0.8981  -4.214 0.000236 ***
## factor(am)1   1.4913     1.4863   1.003 0.324262    
## factor(vs)1   3.6150     1.2761   2.833 0.008454 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.779 on 28 degrees of freedom
## Multiple R-squared:  0.8079, Adjusted R-squared:  0.7873 
## F-statistic: 39.25 on 3 and 28 DF,  p-value: 3.659e-10
summary(lm(mpg~wt+factor(am)+factor(cyl),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(cyl), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4898 -1.3116 -0.5039  1.4162  5.7758 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   33.7536     2.8135  11.997  2.5e-12 ***
## wt            -3.1496     0.9080  -3.469  0.00177 ** 
## factor(am)1    0.1501     1.3002   0.115  0.90895    
## factor(cyl)6  -4.2573     1.4112  -3.017  0.00551 ** 
## factor(cyl)8  -6.0791     1.6837  -3.611  0.00123 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared:  0.8375, Adjusted R-squared:  0.8134 
## F-statistic: 34.79 on 4 and 27 DF,  p-value: 2.73e-10
summary(lm(mpg~wt+factor(am)+factor(gear),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(gear), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5798 -2.4056 -0.3692  1.8198  5.7713 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    35.0955     3.1862  11.015 1.72e-11 ***
## wt             -4.8782     0.7945  -6.140 1.46e-06 ***
## factor(am)1     0.1883     1.9942   0.094    0.925    
## factor(gear)4   2.0769     1.7343   1.198    0.242    
## factor(gear)5  -1.0615     2.3845  -0.445    0.660    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.968 on 27 degrees of freedom
## Multiple R-squared:  0.7888, Adjusted R-squared:  0.7575 
## F-statistic:  25.2 on 4 and 27 DF,  p-value: 8.931e-09
summary(lm(mpg~wt+factor(am)+factor(carb),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(carb), data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.301 -1.906  0.000  1.381  5.179 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     32.688      3.669   8.910 4.45e-09 ***
## wt              -3.523      1.058  -3.329   0.0028 ** 
## factor(am)1      2.498      1.827   1.367   0.1843    
## factor(carb)2   -1.201      1.495  -0.804   0.4295    
## factor(carb)3   -2.789      2.271  -1.228   0.2312    
## factor(carb)4   -3.917      1.875  -2.089   0.0475 *  
## factor(carb)6   -5.727      3.354  -1.707   0.1007    
## factor(carb)8   -7.609      3.670  -2.073   0.0491 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.987 on 24 degrees of freedom
## Multiple R-squared:  0.8098, Adjusted R-squared:  0.7543 
## F-statistic:  14.6 on 7 and 24 DF,  p-value: 2.913e-07
summary(lm(mpg~wt+cyl+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + cyl + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1735 -1.5340 -0.5386  1.5864  6.0812 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.4179     2.6415  14.923 7.42e-15 ***
## wt           -3.1251     0.9109  -3.431  0.00189 ** 
## cyl          -1.5102     0.4223  -3.576  0.00129 ** 
## factor(am)1   0.1765     1.3045   0.135  0.89334    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.612 on 28 degrees of freedom
## Multiple R-squared:  0.8303, Adjusted R-squared:  0.8122 
## F-statistic: 45.68 on 3 and 28 DF,  p-value: 6.51e-11
summary(lm(mpg~wt+hp+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## factor(am)1  2.083710   1.376420   1.514 0.141268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
summary(lm(mpg~wt+gear+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + gear + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1663 -2.4342 -0.2539  1.5132  6.6583 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.2114     5.2849   7.420 4.43e-08 ***
## wt           -5.3798     0.8017  -6.710 2.77e-07 ***
## gear         -0.5570     1.2619  -0.441    0.662    
## factor(am)1   0.5938     2.1009   0.283    0.780    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.142 on 28 degrees of freedom
## Multiple R-squared:  0.7545, Adjusted R-squared:  0.7282 
## F-statistic: 28.69 on 3 and 28 DF,  p-value: 1.097e-08
summary(lm(mpg~wt+carb+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + carb + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5856 -2.1105  0.1393  1.5248  5.1851 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  34.0163     2.9713  11.448 4.49e-12 ***
## wt           -3.6340     0.9281  -3.915 0.000527 ***
## carb         -1.1593     0.4063  -2.853 0.008046 ** 
## factor(am)1   2.5263     1.6479   1.533 0.136490    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.775 on 28 degrees of freedom
## Multiple R-squared:  0.8085, Adjusted R-squared:  0.788 
## F-statistic: 39.41 on 3 and 28 DF,  p-value: 3.5e-10
summary(lm(mpg~wt,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 91.37533  1.00000 30.00000
summary(lm(mpg~cyl,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 79.56103  1.00000 30.00000
summary(lm(mpg~disp,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 76.51266  1.00000 30.00000
summary(lm(mpg~hp,data=mtcars))$fstatistic
##   value   numdf   dendf 
## 45.4598  1.0000 30.0000
summary(lm(mpg~drat,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 25.96964  1.00000 30.00000
summary(lm(mpg~vs,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 23.66224  1.00000 30.00000
summary(lm(mpg~am,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 16.86028  1.00000 30.00000
summary(lm(mpg~carb,data=mtcars))$fstatistic
##    value    numdf    dendf 
## 13.07365  1.00000 30.00000
summary(lm(mpg~gear,data=mtcars))$fstatistic
##     value     numdf     dendf 
##  8.995144  1.000000 30.000000
summary(lm(mpg~qsec,data=mtcars))$fstatistic
##     value     numdf     dendf 
##  6.376702  1.000000 30.000000

the f statistic decreases with the correlation of each single variable

wt, cyl, disp, hp are all correlated

Nested modeling

nm01 <- lm(mpg ~ wt, data=mtcars)
nm03 <- update(nm01, mpg ~ wt+cyl+disp)
nm05 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat)
nm07 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat+factor(vs)+factor(am))
nm09 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat+factor(vs)+factor(am)+carb+gear)
nm10 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat+factor(vs)+factor(am)+carb+gear+qsec)
anova(nm01,nm03,nm05,nm07,nm09,nm10)
## Analysis of Variance Table
## 
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + cyl + disp
## Model 3: mpg ~ wt + cyl + disp + hp + drat
## Model 4: mpg ~ wt + cyl + disp + hp + drat + factor(vs) + factor(am)
## Model 5: mpg ~ wt + cyl + disp + hp + drat + factor(vs) + factor(am) + 
##     carb + gear
## Model 6: mpg ~ wt + cyl + disp + hp + drat + factor(vs) + factor(am) + 
##     carb + gear + qsec
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     30 278.32                                
## 2     28 188.49  2    89.830 6.3949 0.006777 **
## 3     26 167.43  2    21.066 1.4997 0.246152   
## 4     24 158.65  2     8.772 0.6245 0.545206   
## 5     22 156.36  2     2.296 0.1634 0.850292   
## 6     21 147.49  1     8.864 1.2621 0.273941   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nnm01 <- lm(mpg ~ wt, data=mtcars)
nnm03 <- update(nnm01, mpg ~ wt+disp+hp)
nnm05 <- update(nnm01, mpg ~ wt+disp+hp+drat+qsec)
anova(nnm01,nnm03,nnm05)
## Analysis of Variance Table
## 
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + disp + hp
## Model 3: mpg ~ wt + disp + hp + drat + qsec
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     30 278.32                                
## 2     28 194.99  2    83.331 6.3676 0.005614 **
## 3     26 170.13  2    24.862 1.8997 0.169802   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nnnm01 <- lm(mpg ~ wt, data=mtcars)
nnnm03 <- update(nnm01, mpg ~ wt+cyl+disp)
nnnm05 <- update(nnm01, mpg ~ wt+cyl+factor(am)+hp+drat)
anova(nnnm01,nnnm03,nnnm05)
## Analysis of Variance Table
## 
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + cyl + disp
## Model 3: mpg ~ wt + cyl + factor(am) + hp + drat
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     30 278.32                                
## 2     28 188.49  2    89.830 6.8848 0.003986 **
## 3     26 169.62  2    18.875 1.4466 0.253694   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lm19 - Fit wt, disp and hp

lm19 <- lm(mpg ~ wt + disp + hp, data=mtcars)
summary(lm19)
## 
## Call:
## lm(formula = mpg ~ wt + disp + hp, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.891 -1.640 -0.172  1.061  5.861 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.105505   2.110815  17.579  < 2e-16 ***
## wt          -3.800891   1.066191  -3.565  0.00133 ** 
## disp        -0.000937   0.010350  -0.091  0.92851    
## hp          -0.031157   0.011436  -2.724  0.01097 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.639 on 28 degrees of freedom
## Multiple R-squared:  0.8268, Adjusted R-squared:  0.8083 
## F-statistic: 44.57 on 3 and 28 DF,  p-value: 8.65e-11
pred_r_squared(lm19)
## [1] 0.7678953
library(car)
vif(lm19)
##       wt     disp       hp 
## 4.844618 7.324517 2.736633
par(mfrow=c(2,2))
plot(lm19)

lm20 - Fit wt, disp and hp and transmission

lm20 <- lm(mpg ~ wt + disp + hp + factor(am), data=mtcars)
summary(lm20)
## 
## Call:
## lm(formula = mpg ~ wt + disp + hp + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4590 -1.6900 -0.3708  1.1301  5.5011 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.209443   2.822826  12.119 1.98e-12 ***
## wt          -3.046747   1.157119  -2.633  0.01383 *  
## disp         0.002489   0.010377   0.240  0.81222    
## hp          -0.039323   0.012434  -3.163  0.00384 ** 
## factor(am)1  2.159271   1.435176   1.505  0.14405    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.581 on 27 degrees of freedom
## Multiple R-squared:  0.8402, Adjusted R-squared:  0.8166 
## F-statistic:  35.5 on 4 and 27 DF,  p-value: 2.181e-10
pred_r_squared(lm20)
## [1] 0.7753741
library(car)
vif(lm20)
##         wt       disp         hp factor(am) 
##   5.963704   7.695157   3.381008   2.386005
par(mfrow=c(2,2))
plot(lm20)

diagnostics

sort(round(dfbetas(lm20),3))
##   [1] -0.903 -0.456 -0.375 -0.346 -0.331 -0.303 -0.299 -0.298 -0.281 -0.264
##  [11] -0.261 -0.260 -0.257 -0.255 -0.255 -0.253 -0.218 -0.215 -0.206 -0.206
##  [21] -0.168 -0.168 -0.164 -0.162 -0.147 -0.144 -0.133 -0.126 -0.124 -0.114
##  [31] -0.113 -0.111 -0.110 -0.101 -0.095 -0.092 -0.091 -0.090 -0.089 -0.089
##  [41] -0.087 -0.083 -0.083 -0.075 -0.066 -0.062 -0.061 -0.060 -0.059 -0.057
##  [51] -0.054 -0.053 -0.051 -0.044 -0.041 -0.041 -0.039 -0.038 -0.037 -0.036
##  [61] -0.034 -0.034 -0.034 -0.033 -0.032 -0.031 -0.029 -0.023 -0.021 -0.018
##  [71] -0.017 -0.016 -0.016 -0.014 -0.011 -0.010 -0.008 -0.007 -0.005 -0.003
##  [81] -0.001  0.001  0.001  0.004  0.004  0.004  0.006  0.009  0.011  0.012
##  [91]  0.012  0.013  0.014  0.017  0.023  0.023  0.026  0.028  0.030  0.035
## [101]  0.039  0.040  0.041  0.045  0.046  0.048  0.051  0.053  0.055  0.059
## [111]  0.060  0.060  0.061  0.067  0.068  0.074  0.075  0.079  0.080  0.095
## [121]  0.097  0.097  0.098  0.106  0.107  0.109  0.109  0.113  0.118  0.120
## [131]  0.121  0.121  0.128  0.129  0.135  0.138  0.139  0.146  0.153  0.154
## [141]  0.161  0.181  0.186  0.192  0.194  0.194  0.197  0.197  0.199  0.220
## [151]  0.241  0.266  0.291  0.302  0.375  0.404  0.472  0.502  0.819  0.873
sort(round(hatvalues(lm20),3))
##          Merc 450SL         Merc 450SLC             Valiant 
##               0.071               0.072               0.076 
##          Datsun 710         AMC Javelin       Porsche 914-2 
##               0.089               0.093               0.094 
##           Mazda RX4          Merc 450SE    Dodge Challenger 
##               0.097               0.098               0.101 
##      Hornet 4 Drive           Fiat X1-9      Toyota Corolla 
##               0.104               0.107               0.109 
##            Fiat 128       Mazda RX4 Wag          Volvo 142E 
##               0.112               0.123               0.130 
##           Merc 240D            Merc 230          Camaro Z28 
##               0.142               0.143               0.146 
##            Merc 280           Merc 280C        Ferrari Dino 
##               0.149               0.149               0.149 
##         Honda Civic        Lotus Europa   Hornet Sportabout 
##               0.153               0.167               0.184 
##       Toyota Corona    Pontiac Firebird          Duster 360 
##               0.190               0.196               0.199 
##   Chrysler Imperial      Ford Pantera L  Cadillac Fleetwood 
##               0.231               0.271               0.273 
## Lincoln Continental       Maserati Bora 
##               0.277               0.503
pairs(mtcars)

Start with full model, remove variables with high p-values until all coefs significant

full <- lm(mpg~cyl+disp+hp+drat+wt+qsec+factor(vs)+factor(am)+gear+carb,data=mtcars)
summary(full)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + factor(vs) + 
##     factor(am) + gear + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## factor(vs)1  0.31776    2.10451   0.151   0.8814  
## factor(am)1  2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(vs)+factor(am)+gear+carb,data=mtcars))
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(vs) + 
##     factor(am) + gear + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4286 -1.5908 -0.0412  1.2120  4.5961 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 10.96007   13.53030   0.810   0.4266  
## disp         0.01283    0.01682   0.763   0.4538  
## hp          -0.02191    0.02091  -1.048   0.3062  
## drat         0.83520    1.53625   0.544   0.5921  
## wt          -3.69251    1.83954  -2.007   0.0572 .
## qsec         0.84244    0.68678   1.227   0.2329  
## factor(vs)1  0.38975    1.94800   0.200   0.8433  
## factor(am)1  2.57743    1.94035   1.328   0.1977  
## gear         0.71155    1.36562   0.521   0.6075  
## carb        -0.21958    0.78856  -0.278   0.7833  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.59 on 22 degrees of freedom
## Multiple R-squared:  0.8689, Adjusted R-squared:  0.8153 
## F-statistic: 16.21 on 9 and 22 DF,  p-value: 9.031e-08
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(am)+gear+carb,data=mtcars))
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(am) + 
##     gear + carb, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.356 -1.576 -0.149  1.218  4.604 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  9.76828   11.89230   0.821   0.4199  
## disp         0.01214    0.01612   0.753   0.4590  
## hp          -0.02095    0.01993  -1.051   0.3040  
## drat         0.87510    1.49113   0.587   0.5630  
## wt          -3.71151    1.79834  -2.064   0.0505 .
## qsec         0.91083    0.58312   1.562   0.1319  
## factor(am)1  2.52390    1.88128   1.342   0.1928  
## gear         0.75984    1.31577   0.577   0.5692  
## carb        -0.24796    0.75933  -0.327   0.7470  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.535 on 23 degrees of freedom
## Multiple R-squared:  0.8687, Adjusted R-squared:  0.823 
## F-statistic: 19.02 on 8 and 23 DF,  p-value: 2.008e-08
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(am)+gear,data=mtcars))
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(am) + 
##     gear, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1200 -1.7753 -0.1446  1.0903  4.7172 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  9.19763   11.54220   0.797  0.43334   
## disp         0.01552    0.01214   1.278  0.21342   
## hp          -0.02471    0.01596  -1.548  0.13476   
## drat         0.81023    1.45007   0.559  0.58151   
## wt          -4.13065    1.23593  -3.342  0.00272 **
## qsec         1.00979    0.48883   2.066  0.04981 * 
## factor(am)1  2.58980    1.83528   1.411  0.17104   
## gear         0.60644    1.20596   0.503  0.61964   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.488 on 24 degrees of freedom
## Multiple R-squared:  0.8681, Adjusted R-squared:  0.8296 
## F-statistic: 22.56 on 7 and 24 DF,  p-value: 4.218e-09
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(am), 
##     data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2669 -1.6148 -0.2585  1.1220  4.5564 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 10.71062   10.97539   0.976  0.33848   
## disp         0.01310    0.01098   1.193  0.24405   
## hp          -0.02180    0.01465  -1.488  0.14938   
## drat         1.02065    1.36748   0.746  0.46240   
## wt          -4.04454    1.20558  -3.355  0.00254 **
## qsec         0.99073    0.48002   2.064  0.04955 * 
## factor(am)1  2.98469    1.63382   1.827  0.07969 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.45 on 25 degrees of freedom
## Multiple R-squared:  0.8667, Adjusted R-squared:  0.8347 
## F-statistic: 27.09 on 6 and 25 DF,  p-value: 8.637e-10
summary(lm(mpg~disp+hp+wt+qsec+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ disp + hp + wt + qsec + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5399 -1.7398 -0.3196  1.1676  4.5534 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 14.36190    9.74079   1.474  0.15238   
## disp         0.01124    0.01060   1.060  0.29897   
## hp          -0.02117    0.01450  -1.460  0.15639   
## wt          -4.08433    1.19410  -3.420  0.00208 **
## qsec         1.00690    0.47543   2.118  0.04391 * 
## factor(am)1  3.47045    1.48578   2.336  0.02749 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.429 on 26 degrees of freedom
## Multiple R-squared:  0.8637, Adjusted R-squared:  0.8375 
## F-statistic: 32.96 on 5 and 26 DF,  p-value: 1.844e-10
summary(lm(mpg~hp+wt+qsec+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ hp + wt + qsec + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4975 -1.5902 -0.1122  1.1795  4.5404 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 17.44019    9.31887   1.871  0.07215 . 
## hp          -0.01765    0.01415  -1.247  0.22309   
## wt          -3.23810    0.88990  -3.639  0.00114 **
## qsec         0.81060    0.43887   1.847  0.07573 . 
## factor(am)1  2.92550    1.39715   2.094  0.04579 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.435 on 27 degrees of freedom
## Multiple R-squared:  0.8579, Adjusted R-squared:  0.8368 
## F-statistic: 40.74 on 4 and 27 DF,  p-value: 4.589e-11
summary(lm(mpg~wt+qsec+factor(am),data=mtcars))
## 
## Call:
## lm(formula = mpg ~ wt + qsec + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4811 -1.5555 -0.7257  1.4110  4.6610 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.6178     6.9596   1.382 0.177915    
## wt           -3.9165     0.7112  -5.507 6.95e-06 ***
## qsec          1.2259     0.2887   4.247 0.000216 ***
## factor(am)1   2.9358     1.4109   2.081 0.046716 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared:  0.8497, Adjusted R-squared:  0.8336 
## F-statistic: 52.75 on 3 and 28 DF,  p-value: 1.21e-11
plot(lm(mpg~wt+qsec+factor(am),data=mtcars))

lm21 - Fit wt, qsec and transmission

lm21 <- lm(mpg ~ wt + qsec + factor(am), data=mtcars)
summary(lm21)
## 
## Call:
## lm(formula = mpg ~ wt + qsec + factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4811 -1.5555 -0.7257  1.4110  4.6610 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.6178     6.9596   1.382 0.177915    
## wt           -3.9165     0.7112  -5.507 6.95e-06 ***
## qsec          1.2259     0.2887   4.247 0.000216 ***
## factor(am)1   2.9358     1.4109   2.081 0.046716 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared:  0.8497, Adjusted R-squared:  0.8336 
## F-statistic: 52.75 on 3 and 28 DF,  p-value: 1.21e-11
pred_r_squared(lm21)
## [1] 0.7945881
library(car)
vif(lm21)
##         wt       qsec factor(am) 
##   2.482952   1.364339   2.541437
par(mfrow=c(2,2))
plot(lm21)

the intercept is the response when the predictors are 0. we are not interested in wt and qsec = 0

lm22 - Fit wt, qsec and transmission; try centering mpg, wt and qsec due to insignificant intercept

lm22 <- lm(mpg~I(wt-mean(wt))+I(qsec-mean(qsec))+factor(am), data=mtcars)
summary(lm22)
## 
## Call:
## lm(formula = mpg ~ I(wt - mean(wt)) + I(qsec - mean(qsec)) + 
##     factor(am), data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4811 -1.5555 -0.7257  1.4110  4.6610 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           18.8979     0.7194  26.271  < 2e-16 ***
## I(wt - mean(wt))      -3.9165     0.7112  -5.507 6.95e-06 ***
## I(qsec - mean(qsec))   1.2259     0.2887   4.247 0.000216 ***
## factor(am)1            2.9358     1.4109   2.081 0.046716 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared:  0.8497, Adjusted R-squared:  0.8336 
## F-statistic: 52.75 on 3 and 28 DF,  p-value: 1.21e-11
pred_r_squared(lm22)
## [1] 0.7945881
library(car)
vif(lm22)
##     I(wt - mean(wt)) I(qsec - mean(qsec))           factor(am) 
##             2.482952             1.364339             2.541437
par(mfrow=c(2,2))
plot(lm22)